In [1]:
import pandas as pd

# Load the CSV file
file_path = r'C:\Users\Dell\Desktop\Trading Analysis\adani.csv'  # Replace with your actual file path
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(data.head())
             timestamp     symbol                    company     open  \
0  1025461800000000000        ACC                ACC Limited  107.789   
1  1025461800000000000  AMBUJACEM     Ambuja Cements Limited   17.278   
2  1025461800000000000   ADANIENT  Adani Enterprises Limited   -0.010   
3  1025548200000000000        ACC                ACC Limited  108.496   
4  1025548200000000000  AMBUJACEM     Ambuja Cements Limited   17.573   

      high      low    close   volume  dividends  stock_splits  
0  109.810  107.115  108.968   659631        0.0           0.0  
1   17.660   17.202   17.573   630442        0.0           0.0  
2   -0.011   -0.010   -0.010  1080397        0.0           0.0  
3  110.046  107.789  108.059   282660        0.0           0.0  
4   17.748   17.409   17.560  1007265        0.0           0.0  
In [2]:
data
Out[2]:
timestamp symbol company open high low close volume dividends stock_splits
0 1025461800000000000 ACC ACC Limited 107.789 109.810 107.115 108.968 659631 0.0 0.0
1 1025461800000000000 AMBUJACEM Ambuja Cements Limited 17.278 17.660 17.202 17.573 630442 0.0 0.0
2 1025461800000000000 ADANIENT Adani Enterprises Limited -0.010 -0.011 -0.010 -0.010 1080397 0.0 0.0
3 1025548200000000000 ACC ACC Limited 108.496 110.046 107.789 108.059 282660 0.0 0.0
4 1025548200000000000 AMBUJACEM Ambuja Cements Limited 17.573 17.748 17.409 17.560 1007265 0.0 0.0
... ... ... ... ... ... ... ... ... ... ...
31492 1677090600000000000 AMBUJACEM Ambuja Cements Limited 336.000 343.850 331.350 336.900 10662112 0.0 0.0
31493 1677090600000000000 ADANIPORTS Adani Ports and Special Economic Zone Limited 539.500 558.150 533.650 551.850 10709730 0.0 0.0
31494 1677090600000000000 ADANIENT Adani Enterprises Limited 1380.000 1438.000 1350.000 1382.650 8904676 0.0 0.0
31495 1677090600000000000 ATGL Adani Total Gas Limited 791.350 791.350 791.350 791.350 51867 0.0 0.0
31496 1677090600000000000 NDTV New Delhi Television Limited 195.250 201.700 193.300 199.100 228676 0.0 0.0

31497 rows × 10 columns

In [4]:
# Summary statistics
summary_stats = data.describe()
print(summary_stats)
          timestamp          open          high           low         close  \
count  3.149700e+04  31497.000000  31497.000000  31497.000000  31497.000000   
mean   1.409762e+18    404.542261    411.745505    396.689603    404.171407   
std    1.817938e+17    623.108375    633.783423    610.711441    622.359535   
min    1.025462e+18     -0.011000     -0.011000     -0.011000     -0.011000   
25%    1.267036e+18     59.423000     60.773000     58.000000     59.238000   
50%    1.434911e+18    140.100000    143.201000    137.100000    139.993000   
75%    1.569868e+18    384.277000    392.376000    377.043000    384.034000   
max    1.677091e+18   4175.000000   4236.750000   4066.400000   4165.300000   

             volume     dividends  stock_splits  
count  3.149700e+04  31497.000000  31497.000000  
mean   3.594056e+06      0.017824      0.000381  
std    7.932287e+06      0.530637      0.041405  
min    0.000000e+00      0.000000      0.000000  
25%    3.934800e+05      0.000000      0.000000  
50%    1.347016e+06      0.000000      0.000000  
75%    3.874840e+06      0.000000      0.000000  
max    2.421999e+08     58.000000      5.000000  
In [12]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set the aesthetic style of the plots
sns.set(style="whitegrid")

# Plot distribution of key numerical variables
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(14, 18))

# Plot distributions
sns.histplot(data['open'], kde=True, ax=axes[0, 0])
axes[0, 0].set_title('Distribution of Opening Prices')

sns.histplot(data['high'], kde=True, ax=axes[0, 1])
axes[0, 1].set_title('Distribution of High Prices')

sns.histplot(data['low'], kde=True, ax=axes[1, 0])
axes[1, 0].set_title('Distribution of Low Prices')

sns.histplot(data['close'], kde=True, ax=axes[1, 1])
axes[1, 1].set_title('Distribution of Closing Prices')

sns.histplot(data['volume'], kde=True, ax=axes[2, 0])
axes[2, 0].set_title('Distribution of Volume')

sns.histplot(data['dividends'], kde=True, ax=axes[2, 1])
axes[2, 1].set_title('Distribution of Dividends')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [4]:
import matplotlib.pyplot as plt
import seaborn as sns

# Select only numeric columns
numeric_data = data.select_dtypes(include=['float64', 'int64'])

# Calculate correlation matrix
correlation_matrix = numeric_data.corr()

# Plot heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix Heatmap')
plt.show()
No description has been provided for this image

The heatmap in the provided file visualizes the correlations between various stock attributes like open, high, low, close, and volume. It uses a color gradient to represent the strength and direction of these correlations, where darker colors indicate stronger relationships and lighter colors indicate weaker ones. This visualization helps identify patterns, such as a strong positive correlation between high and close prices, indicating that higher highs often coincide with higher closing prices. Additionally, the heatmap can reveal how trading volume correlates with price movements, aiding in understanding the stock's behavior and informing trading strategies.¶

In [8]:
# Pair plot to visualize relationships between variables
sns.pairplot(data[['open', 'high', 'low', 'close', 'volume']])
plt.show()
No description has been provided for this image
In [9]:
# Check for missing values
missing_values = data.isnull().sum()
print("Missing values in each column:\n", missing_values)
Missing values in each column:
 timestamp       0
symbol          0
company         0
open            0
high            0
low             0
close           0
volume          0
dividends       0
stock_splits    0
dtype: int64

TIME SERIES ANALYSIS

In [10]:
# Convert timestamp to datetime
data['timestamp'] = pd.to_datetime(data['timestamp'], unit='ns')

# Plot time series for opening prices
plt.figure(figsize=(14, 7))
sns.lineplot(x='timestamp', y='open', data=data)
plt.title('Opening Prices Over Time')
plt.xlabel('Time')
plt.ylabel('Opening Price')
plt.show()
No description has been provided for this image

BOX PLOTS TO DIRECT OUTLIERS

In [14]:
# Box plots for price variables
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 10))

sns.boxplot(data['open'], ax=axes[0, 0])
axes[0, 0].set_title('Box Plot of Opening Prices')

sns.boxplot(data['high'], ax=axes[0, 1])
axes[0, 1].set_title('Box Plot of High Prices')

sns.boxplot(data['low'], ax=axes[1, 0])
axes[1, 0].set_title('Box Plot of Low Prices')

sns.boxplot(data['close'], ax=axes[1, 1])
axes[1, 1].set_title('Box Plot of Closing Prices')

plt.tight_layout()
plt.show()
No description has been provided for this image

Volatility Analysis¶

In [5]:
# Calculate daily price change (close - open)
data['price_change'] = data['close'] - data['open']

# Plot price change
plt.figure(figsize=(6, 3))
sns.histplot(data['price_change'], kde=True)
plt.title('Distribution of Daily Price Change')
plt.xlabel('Daily Price Change')
plt.ylabel('Frequency')
plt.show()

# Plot time series of daily price change
plt.figure(figsize=(10, 3))
sns.lineplot(x='timestamp', y='price_change', data=data)
plt.title('Daily Price Change Over Time')
plt.xlabel('Time')
plt.ylabel('Daily Price Change')
plt.show()
No description has been provided for this image
No description has been provided for this image

Candlestick Chart: Provides a detailed view of stock price movements, showing the opening, closing, high, and low prices for each time period.¶

In [6]:
import plotly.graph_objects as go

# Create the candlestick chart
fig = go.Figure(data=[go.Candlestick(x=data['timestamp'],
                                     open=data['open'],
                                     high=data['high'],
                                     low=data['low'],
                                     close=data['close'])])

fig.update_layout(title='Candlestick Chart', xaxis_title='Time', yaxis_title='Price')
fig.show()

Moving Averages: Smooth out short-term price fluctuations and help identify the direction of the trend¶

In [7]:
# Calculate moving averages
data['MA20'] = data['close'].rolling(window=20).mean()
data['MA50'] = data['close'].rolling(window=50).mean()

# Plot moving averages
plt.figure(figsize=(14, 7))
plt.plot(data['timestamp'], data['close'], label='Closing Price')
plt.plot(data['timestamp'], data['MA20'], label='20-Day MA')
plt.plot(data['timestamp'], data['MA50'], label='50-Day MA')
plt.title('Moving Averages')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()
No description has been provided for this image

Daily Returns: Show the percentage change in stock price from one day to the next, useful for understanding volatility.¶

In [8]:
# Calculate daily returns
data['daily_return'] = data['close'].pct_change()

# Plot daily returns
plt.figure(figsize=(14, 7))
sns.lineplot(x='timestamp', y='daily_return', data=data)
plt.title('Daily Returns')
plt.xlabel('Time')
plt.ylabel('Daily Return')
plt.show()

# Plot distribution of daily returns
plt.figure(figsize=(14, 7))
sns.histplot(data['daily_return'].dropna(), kde=True)
plt.title('Distribution of Daily Returns')
plt.xlabel('Daily Return')
plt.ylabel('Frequency')
plt.show()
No description has been provided for this image
No description has been provided for this image

ollinger Bands: Help identify overbought or oversold conditions based on volatility.¶

In [9]:
# Calculate Bollinger Bands
data['MA20'] = data['close'].rolling(window=20).mean()
data['stddev'] = data['close'].rolling(window=20).std()
data['upper_band'] = data['MA20'] + (data['stddev'] * 2)
data['lower_band'] = data['MA20'] - (data['stddev'] * 2)

# Plot Bollinger Bands
plt.figure(figsize=(14, 7))
plt.plot(data['timestamp'], data['close'], label='Closing Price')
plt.plot(data['timestamp'], data['MA20'], label='20-Day MA')
plt.plot(data['timestamp'], data['upper_band'], label='Upper Band')
plt.plot(data['timestamp'], data['lower_band'], label='Lower Band')
plt.fill_between(data['timestamp'], data['upper_band'], data['lower_band'], alpha=0.2)
plt.title('Bollinger Bands')
plt.xlabel('Time')
plt.ylabel('Price')
plt.legend()
plt.show()
No description has been provided for this image

Volume by Price: Shows the total trading volume at different price levels, highlighting areas of significant trading activity.¶

In [10]:
import numpy as np

# Define price bins
bins = np.linspace(data['low'].min(), data['high'].max(), 50)
data['price_bin'] = pd.cut(data['close'], bins)

# Calculate volume by price bin
volume_by_price = data.groupby('price_bin')['volume'].sum()

# Plot volume by price
plt.figure(figsize=(14, 7))
volume_by_price.plot(kind='barh')
plt.title('Volume by Price')
plt.xlabel('Volume')
plt.ylabel('Price Bin')
plt.show()
C:\Users\Dell\AppData\Local\Temp\ipykernel_1292\47422830.py:8: FutureWarning:

The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.

No description has been provided for this image
In [ ]: